Looking into rat sighting v waste data

waste <-
  read.csv("DSNY_Monthly_Tonnage_Data_20231202.csv") |> 
  janitor::clean_names() |> 
  separate(month, into = c("year", "month"), sep = " / ") |>
  group_by(year, month, borough) |>
  filter(year >= 2016 & year < 2024 ) |>
  summarize(
    total_refuse = sum(refusetonscollected, na.rm = TRUE),
    total_paper = sum(papertonscollected, na.rm = TRUE),
    total_mgp = sum(mgptonscollected, na.rm = TRUE))  |> 
  mutate_all(tolower) |> 
  mutate(across(where(is.character), trimws)) 
waste_2 = waste |> 
   pivot_longer(
    total_refuse:total_mgp, 
    names_to = "type",
    values_to = "tons") |> 
  mutate(type = substr(type, 7, 12),
         tons = as.numeric(tons)) |> 
  group_by(year, month, borough, type, tons) 
sightings <-
  read_csv('NYC_Rat_Sightings.csv') |> 
  janitor::clean_names() |> 
  separate(created_date, into=c("month","e", "day","f", "year", "g", "time"), sep=c(2,3,5,6,10,11)) |> 
  select(-e,-f,-g) |> 
  mutate(date = paste(year, month, day, sep=""), 
         date = as.numeric(date)) |>  
  filter(date <= 20231031, date >= 20160101, !incident_zip <= 10000, !incident_zip >11697, !borough %in% c("Unspecified", NA)) |> 
  select(
    -agency, -agency_name, -complaint_type, -descriptor, -landmark, -facility_type, -park_facility_name, -vehicle_type, -taxi_company_borough, -taxi_pick_up_location, -bridge_highway_name, -road_ramp, -bridge_highway_segment, -bridge_highway_direction) |>
  select(unique_key, date, year, month, day, everything()) |> 
  mutate_all(tolower) |>
  mutate(across(where(is.character), trimws)) |> 
  group_by(year, month, borough) |> 
  summarize(ratcount = n()) 
merged = inner_join(waste_2, sightings, 
            by = c("year", "month", "borough")) |> 
  mutate(tons = as.numeric(tons)) |> 
  mutate(
    combined_ym = paste(year, month, sep = "-"),
    combined_ym = ym(combined_ym)) 

merged_tons = merged |> 
  group_by(year, month, borough, ratcount) |> 
  summarize(total_tons = (sum(tons)))  |> 
  mutate(
    combined_ym = paste(year, month, sep = "-"),
    combined_ym = ym(combined_ym) )

visualization

ggplot(merged, aes(x=month, y = ratcount, color = borough, group = interaction(year, borough))) + 
    geom_line() + 
    labs(title = "Total Rat Count by Borough",
         x = "Month",
         y = "Rat Count",
         caption = "Data from NYC Open Data") +
  facet_wrap(year ~ . ) 

ggplot(merged, aes(x=combined_ym, y = ratcount, color = borough, group = interaction(year, borough))) + 
    geom_line() + 
    labs(title = "Total Rat Count by Borough",
         x = "Month",
         y = "Rat Count",
         caption = "Data from NYC Open Data") 

ggplot(merged_tons, aes(x=combined_ym, y = total_tons, color = borough, group = interaction(year, borough))) + 
    geom_line() + 
    labs(title = "Total Tons of Trash by Borough",
         x = "Date",
         y = "Total Tons",
         caption = "Data from DSNY Monthly Tonnage Data") 

ggplot(merged, aes(x = ratcount, y = tons, color = type)) +
  geom_point(size = 2, alpha = .6) +
  ylim(0, 85000) + 
  labs(title = "Tons of Trash by Rat Count and Trash Type",
       x = "Rat Count", 
       y = "Total Tons",
       color = "Type of Waste",
       caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data")  

ggplot(merged_tons, aes(y = ratcount, x = total_tons, color = borough)) +
  geom_point(size = 2, alpha = .6) +
  ylim(0, 1200) + 
  labs(title = "Rat Count by Total Tons of Trash",
       y = "Rat Count", 
       x = "Total Tons",
       caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data") 

ggplot(merged_tons, aes(x = total_tons, y = ratcount, color = borough)) +
  facet_wrap(borough~.) +
  geom_point(size = 1, alpha = .6) +
  ylim(0, 1200) + 
  labs(title = "Rat Count by Total Tons of Waste",
       x = "Total Tons", 
       y = "Rat Count",
       color = "Type of Waste",
       caption = "Data from NYC Open Data and DSNY Monthly Tonnage Data")

merged_tons |>
  mutate(text_label = str_c("Tons of Trash: ", total_tons, "\nNumber of Rats: ", ratcount)) |> 
  plot_ly(
    x = ~ratcount, y = ~total_tons, type = "scatter", mode = "markers",
    color = ~borough, text = ~text_label, alpha = 0.5)
merged_tons |>
  mutate(text_label = str_c("Number of Rats: ", ratcount, "\nTons of Trash: ", total_tons)) |> 
  plot_ly(
    y = ~ratcount, x = ~total_tons, type = "scatter", mode = "markers",
    color = ~borough, text = ~text_label, alpha = 0.5)